library(tidyverse)
library(plotly)
library(highcharter)
library(xts)
library(rgdal)
## Loading required package: sp
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-2, (SVN revision 1183)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.5.3, released 2022/10/21
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rgdal/gdal
## GDAL does not use iconv for recoding strings.
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 9.1.0, September 1st, 2022, [PJ_VERSION: 910]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.5-1
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library(leaflet)
##
## Attaching package: 'leaflet'
## The following object is masked from 'package:xts':
##
## addLegend
library(leaflet.extras)
First, read in the dataset
url_2012 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2012.csv"
rat_2012 = read_csv(url(url_2012))
url_2013 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2013.csv"
rat_2013 = read_csv(url(url_2013))
url_2014 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2014.csv"
rat_2014 = read_csv(url(url_2014))
url_2015 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2015.csv"
rat_2015 = read_csv(url(url_2015))
url_2016 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2016.csv"
rat_2016 = read_csv(url(url_2016))
url_2017 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2017.csv"
rat_2017 = read_csv(url(url_2017))
url_2018 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2018.csv"
rat_2018 = read_csv(url(url_2018))
url_2019 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2019.csv"
rat_2019 = read_csv(url(url_2019))
url_2020 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2020.csv"
rat_2020 = read_csv(url(url_2020))
url_2021 = "https://raw.githubusercontent.com/YijiaJiang/p8105_final_project_data/main/rat_2021.csv"
rat_2021 = read_csv(url(url_2021))
rat = bind_rows(rat_2012, rat_2013, rat_2014, rat_2015, rat_2016, rat_2017, rat_2018, rat_2019, rat_2020, rat_2021) %>%
select(-...1) %>%
mutate(inspection_month_n = recode(inspection_month,
"Jan" = 1,
"Feb" = 2,
"Mar" = 3,
"Apr" = 4,
"May" = 5,
"Jun" = 6,
"Jul" = 7,
"Aug" = 8,
"Sep" = 9,
"Oct" = 10,
"Nov" = 11,
"Dec" = 12)) %>%
mutate(date = paste(inspection_year, inspection_month_n, inspection_day, sep = "-"))%>%
mutate(date = as.Date(date,format = "%Y-%m-%d"))
Time series
by_date = rat %>%
group_by(date) %>%
summarise(Total = n())
time_series = xts(by_date$Total , order.by= by_date$date)
hchart(time_series, name = "Rat Sightings") %>%
#hc_add_theme(hc_theme_sandsignika()) %>%
hc_credits(enabled = TRUE, text = "Sources: City of New York", style = list(fontSize = "12px")) %>%
hc_title(text = "Time Series of NYC Rat Sightings") %>%
hc_legend(enabled = TRUE)
Plot 1: Number of Rat Inspections by year
rat %>%
mutate(inspection_year = as.factor(inspection_year)) %>%
group_by(inspection_year) %>%
summarise(n_obs = n()) %>%
plot_ly(x = ~inspection_year, y = ~n_obs, type = "scatter", mode = "lines+markers") %>%
layout(xaxis = list(title = "Inspection Year"),
yaxis = list(title = "Number of Inspections"),
title = "Number of Rat Inspections by Year")
Plot 2: Number of Rat Inspections by Month
rat %>%
mutate(inspection_month = as.factor(inspection_month)) %>%
mutate(inspection_month = inspection_month %>%
fct_relevel("Jan", "Feb", "Mar","Apr","May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) %>%
group_by(inspection_year, inspection_month) %>%
summarise(n_obs = n()) %>%
plot_ly(x = ~inspection_month, y = ~n_obs, type = "box",
color = ~inspection_month) %>%
layout(xaxis = list(title = "Inspection Month"),
yaxis = list(title = "Number of Inspections"),
title = "Number of Rat Inspections by Month") %>%
hide_legend()
## `summarise()` has grouped output by 'inspection_year'. You can
## override using the `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
Plot 3: Number of Rat Inspections Throughout a Day in 2021
rat_2021 %>%
mutate(inspection_day = as.factor(inspection_day)) %>%
mutate(inspection_month = as.factor(inspection_month)) %>%
mutate(inspection_month = inspection_month %>%
fct_relevel("Jan", "Feb", "Mar","Apr","May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) %>%
separate(inspection_time, into = c("hour", "minute", "second")) %>%
group_by(inspection_month, inspection_day, hour) %>%
summarise(n_obs = n()) %>%
plot_ly(x = ~hour, y = ~n_obs, type = "scatter",
color = ~inspection_day,
frame = ~inspection_month) %>%
layout(xaxis = list(title = "Time", range = list(0,24), dtick = 3,
tickvals = c(0, 3, 6, 9, 12, 15, 18, 21, 24),
ticktext = c("12am", "3am", "6am", "9am", "12pm", "3pm", "6pm", "9pm", "12am")),
yaxis = list(title = "Number of Inspections"),
title = "Number of Rat Inspections Throughout a Day in 2021") %>%
hide_legend()
## `summarise()` has grouped output by 'inspection_month',
## 'inspection_day'. You can override using the `.groups` argument.
## No scatter mode specifed: Setting the mode to markers Read more
## about this attribute ->
## https://plotly.com/r/reference/#scatter-mode
Plot 4: Number of Rat Inspections by borough
rat %>%
mutate(borough = as.factor(borough)) %>%
group_by(inspection_year, borough) %>%
summarise(n_obs = n()) %>%
plot_ly(x = ~borough, y = ~n_obs, color = ~borough, frame = ~inspection_year, type = "bar") %>%
layout(xaxis = list(title = "Borough"),
yaxis = list(title = "Number of Inspections"),
title = "Number of Rat Inspections by Borough") %>%
hide_legend()
## `summarise()` has grouped output by 'inspection_year'. You can
## override using the `.groups` argument.
Plot 5: inspection type
colors = c("#38C5A3", "#F09968", "#D2959B", "#B499CB", "#EE90BA", "#A8D14F", "#E5D800", "#FCCD4C", "#E2BF96", "#B3B3B3")
rat %>%
mutate(inspection_type = case_when(inspection_type == "Initial" ~ "Initial Inspection",
inspection_type == "BAIT" ~ "Baiting",
inspection_type == "Compliance"~ "Compliance Inspection",
inspection_type == "STOPPAGE" ~ "Stoppage",
inspection_type == "CLEAN_UPS" ~ "Clean Up")) %>%
group_by(inspection_type) %>%
summarise(n_obs = n(),
prop = n_obs/nrow(rat)) %>%
arrange(-prop) %>%
mutate(inspection_type = fct_reorder(inspection_type, -prop)) %>%
plot_ly(labels = ~inspection_type, values = ~prop, type = "pie",
insidetextfont = list(color = "#FFFFFF"),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1))
) %>%
layout(title = "Distribution of Inspection Type",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
Plot 6: inspection results
colors = c("#38C5A3", "#F09968", "#D2959B", "#B499CB", "#EE90BA", "#A8D14F", "#E5D800", "#FCCD4C", "#E2BF96", "#B3B3B3")
rat %>%
drop_na() %>%
group_by(result) %>%
summarise(n_obs = n(),
prop = n_obs/nrow(rat)) %>%
arrange(-prop) %>%
mutate(result = fct_reorder(result, -prop)) %>%
plot_ly(labels = ~result, values = ~prop, type = "pie",
insidetextfont = list(color = "#FFFFFF"),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1))
) %>%
layout(title = "Distribution of Inspection Results",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
Plot 7: Association with Weather
rat_n = rat %>%
group_by(inspection_year, inspection_month, inspection_day) %>%
summarise(n_obs = n())
## `summarise()` has grouped output by 'inspection_year',
## 'inspection_month'. You can override using the `.groups`
## argument.
t = rat %>%
mutate(t_avg = (tmin+tmax)/2) %>%
distinct(inspection_year, inspection_month, inspection_day, t_avg, tmax, tmin)
rat_t = left_join(rat_n, t, by = c("inspection_year", "inspection_month", "inspection_day")) %>%
as.data.frame()
rat_t %>%
plot_ly(x = ~tmin, y = ~tmax, z = ~n_obs, type = "heatmap") %>%
layout(xaxis = list(title = "Minimum Temperature"),
yaxis = list(title = "Maximum Temperature"),
title = "Association Between Number of Rat Inspections and Weather") %>%
hide_legend()
Plot 8: Association with Covid-19
rat %>%
filter(inspection_year %in% c(2019, 2020)) %>%
mutate(inspection_year = as.factor(inspection_year)) %>%
mutate(inspection_month = as.factor(inspection_month)) %>%
mutate(inspection_month = inspection_month %>%
fct_relevel("Jan", "Feb", "Mar","Apr","May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) %>%
group_by(inspection_year, inspection_month) %>%
summarise(n_obs = n()) %>%
plot_ly(x = ~inspection_month, y = ~n_obs, type = "scatter", mode = "lines",
color = ~inspection_year) %>%
layout(xaxis = list(title = "Inspection Month"),
yaxis = list(title = "Number of Inspections"),
title = "Number of Rat Inspections Before and After COVID-19")
## `summarise()` has grouped output by 'inspection_year'. You can
## override using the `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Plot 9: map
top = 40.917577 # north lat
left = -74.259090 # west long
right = -73.700272 # east long
bottom = 40.477399 # south lat
nyc = rat_2021 %>%
mutate(bbl = paste(boro_code, block, lot)) %>%
filter(latitude >= bottom) %>%
filter (latitude <= top) %>%
filter(longitude >= left ) %>%
filter(longitude <= right)
center_lon = median(nyc$longitude,na.rm = TRUE)
center_lat = median(nyc$latitude,na.rm = TRUE)
count = rat_2021 %>%
mutate(bbl = paste(boro_code, block, lot)) %>%
group_by(bbl) %>%
count()
nyc = merge(nyc, count, by = "bbl")
factpal = colorFactor("blue", nyc$n)
nyc %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addHeatmap(lng = ~longitude, lat = ~latitude, intensity = ~(nyc$n), blur = 20, max = 0.05, radius = 15) %>%
setView(lng=center_lon, lat=center_lat,zoom = 10)